library(tidyverse)
library(lubridate) 
library(here)
library(kableExtra)
library(broom)
library(janitor)
library(ggridges)
library(patchwork)
library(latex2exp)
library(knitr)
library(modelsummary)
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, fig.width = 6.5)
theme_set(theme_minimal())
all_na <- function(x) any(!is.na(x))
# knitr::opts_chunk$set(fig.pos = "H", out.extra = "") # to control position of figures


apsrcolors2 = c("#646768", "#afd4e0")
apsrcolors2_inv = c("#afd4e0", "#646768")


# modal = read_csv("https://healthdata.gov/api/views/a8v3-a3m3/rows.csv?accessType=DOWNLOAD") %>%
#   clean_names()
modal = read_csv(here("data", "School_Learning_Modalities__2020-2021.csv")) %>%
  clean_names()
# modal1 = read_csv("https://healthdata.gov/api/views/aitj-yx37/rows.csv?accessType=DOWNLOAD") %>%
#   clean_names()
modal1 = read_csv(here("data","School_Learning_Modalities__2021-2022.csv")) %>%
  clean_names()

modal = modal %>%
  bind_rows(modal1)
# modal = unique(modal)
# colnames(modal)
modal_w = modal %>%
  filter(!is.na(week)) %>%
  group_by(week, learning_modality) %>%
  summarize(counts = sum(student_count, na.rm = TRUE))
modal_w = modal_w %>%
  group_by(week) %>%
  mutate(week = mdy_hms(week), 
         week = as.Date(week),
         learning_modality = as_factor(learning_modality), 
         learning_modality = fct_relevel(learning_modality, "Remote", "Hybrid", "In Person"), 
         prop = counts/sum(counts, na.rm = TRUE),
         ) %>%
  ungroup() %>%
  arrange(week)

in_person50 = modal_w %>%
  filter(learning_modality == "In Person") %>%
  arrange(week)
# tmp = modal_w %>%
#   filter(week > "2021-04-01" & week < "2021-10-01")
in_person50 = in_person50 %>%
  filter(prop > 0.5) %>%
  slice(1)
# in_person50


apsrcolors3_inv = c("#B3B3B4", "#646768", "#000000")

modality_plot = modal_w %>%
  filter(week <= "2022-10-01") %>%
  ggplot() +
  geom_area(aes(x = week, y = prop, fill = learning_modality), alpha = .7) +
  geom_rect(aes(xmin=as.Date("2021-06-01"), xmax=as.Date("2021-07-31"), ymin=-0, ymax=1), fill = "#EEEEEE") + 
  annotate("text", x= as.Date("2021-07-01"), y = .5, label = "Summer: No data", angle = 90, size = 5) +
  # geom_vline(xintercept = as.Date(in_person50$week[1]), linetype=2) +
  scale_fill_manual(values = c(apsrcolors3_inv)) +
  theme_minimal() +
  theme(legend.position = "bottom", legend.title = element_blank(), axis.text.x = element_text(angle = 55, hjust = 1), plot.title = element_text(hjust = 0.5), axis.text=element_text(size=11)) +
  labs(x = "Weeks", y = "Learning modality (proportion)", caption = "Source:CDC (2023a, 2023b).") +
  scale_x_date(date_breaks = "8 weeks", date_labels = "%m-%Y")


modality_plot


monthly = read_csv(here("data", "submissions_byyearmonth.csv"))
monthly = monthly %>%
  mutate(year_month = ym(paste0(year, month, sep = "-")))
start_man = as.Date("2018-01-01")
end_man = as.Date("2020-05-30")
start_cur = as.Date("2020-06-01")
end_cur = as.Date("2022-10-01")
m = monthly %>%
  filter(year_month >= "2018-01-01" & year_month <= "2022-10-01") %>%
  mutate(announce = if_else(year_month>"2019-07-01", 1, 0), 
         pandemic = if_else(year_month>"2020-03-01", 1, 0), 
         team = if_else(year_month>"2020-05-01", 1, 0), 
         open = if_else(year_month>"2021-08-01", 1, 0), 
         period = announce + pandemic + team + open, 
         period_f = as_factor(period), 
         period_f = fct_recode(period_f, "Reference" = "0", 
                               "Post-announcement" = "1", 
                               "Early pandemic" = "2", 
                               "New team" = "3", 
                               "AY2021-22 start" = "4"))
m1 = lm(n ~ period_f, data = m)


m1_list = list("Monthly submissions" = m1)
t1 = modelsummary::modelsummary(m1_list, stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001),
                           coef_rename = c("(Intercept)" = "Intercept (pre-announcement)", 
                                           "period_fPost-announcement" = "Post-announcement", 
                                           "period_fEarly pandemic" = "Early pandemic", 
                                           "period_fNew team" = "New team", 
                                           "period_fAY2021-22 start" = "AY2021-22+"),
                           notes = list("Linear regression coefficients (and standard errors).", 
                           "See Figure 1 in main text for coding of time periods."), 
                           title = "Number of new submissions regressed on time periods", output="kableExtra", booktabs=T)


t1 %>% column_spec(1,width="2.5in") %>% column_spec(2,width="2.5in")


by_teams = read_csv(here("data", "monthly_byteams.csv"))
team_means = by_teams %>%
  group_by(team) %>%
  summarize(team_mean = mean(n))
by_teams_plot = by_teams %>%
  filter(team == "Mannheim" | team == "Current" | team == "UNT") %>%
  ggplot() +
  geom_line(aes(y=n, x=n_month, group = team, linetype = team)) +
  labs(y = "Monthy new submissions", x = "Month in tenure", caption = "Note: The UNT (2012-16) and Mannheim (2016-20) teams began on July 1,\nwhile the Current team began on June 1, 2020.") +
  annotate("text", x= 9, y = 50, label = TeX(paste0("$\\bar{y}_{UNT}=$", round(team_means$team_mean[3], digits = 1), sep = ""))) +
  annotate("text", x= 4, y = 110, label = TeX(paste0("$\\bar{y}_{Mann.}=$", round(team_means$team_mean[2], digits = 1), sep = ""))) +
  annotate("text", x= 17, y = 170, label = TeX(paste0("$\\bar{y}_{Current}=$", round(team_means$team_mean[1], digits = 1), sep = ""))) +
  theme(legend.position="bottom", legend.title = element_blank())
# ggsave(here("figures", "by_team_monthly_means.png"), units = "in", width = 6.6, height = 4)


by_teams_plot


classification_inter = read_csv(here("data", "classification_inter.csv")) %>%
  mutate_all(as_factor)

team_eth = classification_inter %>%
  filter(Race == "Race/Ethnicity") %>%
  filter(author_team_eth_basic != "Declined") %>%
  mutate(author_team_eth_basic = fct_drop(author_team_eth_basic)) %>%
  select(author_team_eth_basic, team)
team_eth_chi = glance(chisq.test(team_eth$author_team_eth_basic, team_eth$team))
team_eth = team_eth %>%
  tabyl(author_team_eth_basic, team) %>%
  rename(`Author characteristics` = author_team_eth_basic) %>%
  as_tibble() %>%
  mutate(Classification = "Race/Ethnicity") %>%
  relocate(Classification)

team_gen = classification_inter %>%
  filter(GenderSex == "Women/\nGender/\nSexuality") %>%
  filter(author_team_gen_basic != "Declined") %>%
  mutate(author_team_gen_basic = fct_drop(author_team_gen_basic)) %>%
  select(author_team_gen_basic, team)
team_gen_chi = glance(chisq.test(team_gen$author_team_gen_basic, team_gen$team))
team_gen = team_gen %>%
  tabyl(author_team_gen_basic, team) %>%
  rename(`Author characteristics` = author_team_gen_basic) %>%
  as_tibble() %>%
  mutate(Classification = "Women/SOGI") %>%
  relocate(Classification)

team_reg = classification_inter %>%
  filter(GlobalSouth == "Global South") %>%
  select(corr_author_region3, team)
team_reg_chi = glance(chisq.test(team_reg$corr_author_region3, team_reg$team))
team_reg = team_reg %>%
  tabyl(corr_author_region3, team) %>%
  as_tibble() %>%
  mutate(Classification = "Global South") %>%
  arrange(desc(Current)) %>%
  rename(`Author characteristics` = corr_author_region3) %>%
  relocate(Classification)


team_reg %>%
  bind_rows(team_gen, team_eth) %>%
  mutate(`Percent change` = (Current-Mannheim)/Mannheim*100, 
         `Author characteristics` = str_replace_all(`Author characteristics`, "\n", " "), 
         `Author characteristics` = str_replace(`Author characteristics`, "Women/ Non-binary", "Women/Non-binary")) %>%
  as_tibble() %>% 
  kable(format = "simple", digits = 1, caption = "New submission author characteristics by substantive topics & editorial team, 2018-2022")


lm1 = glm(GenderSex ~ author_team_gen_basic * team, data = classification_inter, binomial(link = "logit"))
lm2 = glm(Race ~ author_team_eth_basic * team, data = classification_inter, binomial(link = "logit"))
lm3 = glm(GlobalSouth ~ corr_author_region3 * team, data = classification_inter, binomial(link = "logit"))
model_list = list("Outcome:\nGlobal\nSouth" = lm3, 
                  "Outcome:\nWomen,\nGender &\nSexuality" = lm1,
                  "Outcome:\nRace/\nEthnicity" = lm2)

model_coeff <- c("teamCurrent"    = "Current team",
        "corr_author_region3Europe"    = 'Europe',
        "corr_author_region3Other"    = 'Other region',
        "corr_author_region3Europe:teamCurrent" = "Europe × Current", 
        "corr_author_region3Other:teamCurrent" = "Other x Current", 
        "author_team_gen_basicWomen/\nNon-binary" = "Women/Non-binary", 
        "author_team_gen_basicMixed\nteam" = "Mixed team", 
        "author_team_gen_basicDeclined" = "Declined gender", 
        "author_team_gen_basicWomen/\nNon-binary:teamCurrent" = "Women/Non-binary × Current", 
        "author_team_gen_basicMixed\nteam:teamCurrent" = "Mixed team × Current", 
        "author_team_gen_basicDeclined:teamCurrent" = "Declined gender × Current", 
        "author_team_eth_basic1+\nAuthor(s)\nof color" ="1+ Author(s) of color", 
        "author_team_eth_basicDeclined" = "Race/Ethnicity declined", 
        "author_team_eth_basic1+\nAuthor(s)\nof color:teamCurrent" = "1+ Author(s) of color × Current", 
        "author_team_eth_basicDeclined:teamCurrent" ="Race/ethnicity declined × Current",
        "(Intercept)" = "Constant")

# modelsummary::modelsummary(model_list, stars = TRUE, )
t2 = modelsummary::modelsummary(model_list, stars = c("*" = 0.05, "**" = 0.01, "***" = 0.001), coef_map = model_coeff, 
                                title = "Classification regressed on author characteristics", 
                                notes = list("Logistic regression coefficients (and standard errors).", 
                                             "See main text for coding of categories."), 
                                output="kableExtra", booktabs=T)




t2 %>% column_spec(1,width="2.5in") %>% column_spec(2:4,width="1.0in")

